Unicellular enrichment profiles

After a question in the TAC about whether the same genes drive the TAI, I decided to use pMatrix and check which genes drive the high (young) TAI in the gametes.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(myTAI)
library(see)

Load PES

Non-transformed

Ec_PES_32m <-
  readr::read_csv(file = "data/Ec_PES_32m.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f <-
  readr::read_csv(file = "data/Ec_PES_25f.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

sqrt-tranformed

Ec_PES_32m.sqrt <-
  readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt <-
  readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

log2-tranformed

Ec_PES_32m.log2 <-
  readr::read_csv(file = "data/Ec_PES_32m.log2.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.log2 <-
  readr::read_csv(file = "data/Ec_PES_25f.log2.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

rank-tranformed

Ec_PES_32m.rank <-
  readr::read_csv(file = "data/Ec_PES_32m.rank.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.rank <-
  readr::read_csv(file = "data/Ec_PES_25f.rank.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

rlog-tranformed

Ec_PES_32m.rlog <-
  readr::read_csv(file = "data/Ec_PES_32m.rlog.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.rlog <-
  readr::read_csv(file = "data/Ec_PES_25f.rlog.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Select unicellular stages

unicellular_pMat <- function(PES, unicell_names = c("meiospore", "gamete", "mitospore"), dropzero = F, ordered_stages, top_genes = F, n_genes = 100, stage_select = "gamete"){
        
        if(!is.data.frame(PES))
                stop("PES is not a dataframe")
        
        stage_select <- rlang::sym(stage_select)
        
        PES_filt <- PES %>% 
                dplyr::select(1, 2, all_of(unicell_names))
        
        if(dropzero){
                PES_filt <- PES_filt %>%
                        dplyr::filter(rowSums(!select(., all_of(unicell_names))) == 0)
        }
        
                
        PES_filt <- PES_filt %>%
                myTAI::pMatrix() %>%
                tibble::as_tibble(rownames = "GeneID") 
        
        # in case one wants to select the top N genes.
        if(top_genes){
                PES_filt <- PES_filt %>%
                        dplyr::slice_max({{stage_select}}, n = n_genes)
        }
        
        PES_filt <- PES_filt %>%
                tidyr::pivot_longer(!GeneID, names_to = "Stage", values_to = "pTAI")
        
        # make factor
        PES_filt$Stage <- base::factor(PES_filt$Stage, ordered_stages)
        return(PES_filt)
}

Plot pMatrix

unicellular_pMat_plot <- function(PES, 
                                  unicell_names = c("meiospore", "gamete", "mitospore"), 
                                  n_genes = 100,
                                  stage_select = "gamete",
                                  transformation = "log2",
                                  alpha = 0,
                                  dropzero = F,
                                  ordered_stages) {
        top_pTAI <- PES %>% 
                unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = stage_select, n_genes = n_genes)
        
        PES_filt <- unicellular_pMat(PES = PES, unicell_names = unicell_names, ordered_stages = ordered_stages)
        # Rest of your function logic
        # ...
        
        # transformation
        f <- match.fun(transformation)
        
        
        plot_re <- PES_filt %>%
                ggplot2::ggplot(aes(y = f(pTAI + alpha), x = Stage, fill = Stage)) +
                see::geom_violinhalf(trim = TRUE, width=1.2, alpha = 0.8) + 
                ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white")) +
                ggplot2::geom_line(
                        data = top_pTAI, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.2,
                        colour = "#3C5488FF") +
                ggplot2::geom_point(
                        data = top_pTAI, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.3,
                        size = 0.7,
                        colour = "#3C5488FF") +
                ggplot2::ylab(paste0(transformation, "(pTAI)")) +
                
                ggplot2::theme_classic() +
                ggplot2::theme(legend.position="none") +
                ggplot2::coord_flip()
  
  return(plot_re)
}

unicellular_pMat_plot_all <- function(PES, 
                                  unicell_names = c("meiospore", "gamete", "mitospore"), 
                                  n_genes = 100,
                                  transformation = "log2",
                                  alpha = 0,
                                  dropzero = F,
                                  ordered_stages) {
        top_pTAI_1 <- PES %>% 
                unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = unicell_names[[1]], n_genes = n_genes)
        top_pTAI_2 <- PES %>% 
                unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = unicell_names[[2]], n_genes = n_genes)
        top_pTAI_3 <- PES %>% 
                unicellular_pMat(dropzero = dropzero, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = unicell_names[[3]], n_genes = n_genes)
        
        top_pTAI_all <- top_pTAI_1 %>%
                inner_join(top_pTAI_2) %>%
                inner_join(top_pTAI_3)
        
        PES_filt <- unicellular_pMat(PES = PES, unicell_names = unicell_names, ordered_stages = ordered_stages)
        # Rest of your function logic
        # ...
        
        # transformation
        f <- match.fun(transformation)
        
        
        plot_re <- PES_filt %>%
                ggplot2::ggplot(aes(y = f(pTAI + alpha), x = Stage, fill = Stage)) +
                see::geom_violinhalf(trim = TRUE, width=1.2, alpha = 0.5) + 
                ggplot2::scale_fill_manual(values = c("#3C5488FF", "#E64B35FF", "#00A087FF")) +
                ggplot2::geom_line(
                        data = top_pTAI_1, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.1,
                        size = 0.3,
                        colour = "#3C5488FF") +
                ggplot2::geom_point(
                        data = top_pTAI_1, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.1,
                        size = 0.7,
                        colour = "#3C5488FF") +
                ggplot2::geom_line(
                        data = top_pTAI_2, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.1,
                        size = 0.3,
                        colour = "#E64B35FF") +
                ggplot2::geom_point(
                        data = top_pTAI_3, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.1,
                        size = 0.7,
                        colour = "#E64B35FF") +
                ggplot2::geom_line(
                        data = top_pTAI_3, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.1,
                        size = 0.3,
                        colour = "#00A087FF") +
                ggplot2::geom_point(
                        data = top_pTAI_3, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.1,
                        size = 0.7,
                        colour = "#00A087FF") +
                ggplot2::geom_line(
                        data = top_pTAI_all, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.5,
                        colour = "black") +
                ggplot2::ylab(paste0(transformation, "(pTAI)")) +
                
                ggplot2::theme_classic() +
                ggplot2::theme(legend.position="none") +
                ggplot2::coord_flip()
  
  return(plot_re)
}

pMatrix calculations

Before removing 0s

ordered_stages <- c("gamete", "meiospore", "mitospore")

unicellular_pMat_plot(Ec_PES_32m, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "raw(TPM)")
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "raw(TPM)")
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.log2, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f.log2, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.rank, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rank(TPM)")

unicellular_pMat_plot(Ec_PES_25f.rank, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rank(TPM)")

unicellular_pMat_plot(Ec_PES_32m.rlog, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 9018 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 10 rows containing missing values (`geom_point()`).

unicellular_pMat_plot(Ec_PES_25f.rlog, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 9203 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 7 rows containing missing values (`geom_point()`).

unicellular_pMat_plot_all(Ec_PES_32m, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "raw(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_25f, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "raw(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_32m.log2, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "log2(TPM+1)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_25f.log2, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "log2(TPM+1)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_32m.rank, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rank(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_25f.rank, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rank(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_32m.rlog, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rlog(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 9018 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).

unicellular_pMat_plot_all(Ec_PES_25f.rlog, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rlog(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 9203 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 23 rows containing missing values (`geom_line()`).
## Warning: Removed 23 rows containing missing values (`geom_point()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).

After removing 0s

unicellular_pMat_plot(Ec_PES_32m, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "raw(TPM)")
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "raw(TPM)")
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.sqrt, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f.sqrt, 
                      dropzero = TRUE, 
                      ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.log2, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f.log2, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.rank, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rank(TPM)")

unicellular_pMat_plot(Ec_PES_25f.rank, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rank(TPM)")

unicellular_pMat_plot(Ec_PES_32m.rlog, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 9018 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 10 rows containing missing values (`geom_point()`).

unicellular_pMat_plot(Ec_PES_25f.rlog, dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 9203 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 7 rows containing missing values (`geom_point()`).

Not using gametes

Before removing 0s

unicellular_pMat_plot(Ec_PES_25f.sqrt,stage_select = "mitospore", ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "white", "#3C5488FF"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.sqrt,stage_select = "mitospore", ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "white", "#3C5488FF"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f.sqrt,stage_select = "meiospore", ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "#3C5488FF", "white"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.sqrt,stage_select = "meiospore", ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "#3C5488FF", "white"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

After removing 0s

unicellular_pMat_plot(Ec_PES_25f.sqrt,stage_select = "mitospore", dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "white", "#3C5488FF"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.sqrt,stage_select = "mitospore", dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "white", "#3C5488FF"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_25f.sqrt,stage_select = "meiospore", dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "#3C5488FF", "white"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14016 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot(Ec_PES_32m.sqrt,stage_select = "meiospore", dropzero = TRUE, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)") +
        ggplot2::scale_fill_manual(values = c("white", "#3C5488FF", "white"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

Multicellular enrichment profiles

multicellular_pMat_plot <- function(PES, 
                                  multicell_names = c("immGA", "matGA", "oldGA", "earlyPSP", "immPSP", "matPSP"), 
                                  n_genes = 100,
                                  stage_select = "immGA",
                                  transformation = "log2",
                                  alpha = 0,
                                  dropzero = F,
                                  ordered_stages) {
        top_pTAI <- PES %>% 
                unicellular_pMat(dropzero = dropzero, unicell_names = multicell_names, ordered_stages = ordered_stages, top_genes = TRUE, stage_select = stage_select, n_genes = n_genes)
        
        PES_filt <- unicellular_pMat(PES = PES, unicell_names = multicell_names, ordered_stages = ordered_stages)
        
        # transformation
        f <- match.fun(transformation)
        
        plot_re <- PES_filt %>%
                ggplot2::ggplot(aes(y = f(pTAI + alpha), x = Stage, fill = Stage)) +
                see::geom_violinhalf(trim = TRUE, width=1.2, alpha = 0.8) + 
                ggplot2::geom_line(
                        data = top_pTAI, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.2,
                        colour = "#3C5488FF") +
                ggplot2::geom_point(
                        data = top_pTAI, 
                        aes(y = f(pTAI + alpha), x = Stage, group = GeneID),
                        alpha = 0.3,
                        size = 0.7,
                        colour = "#3C5488FF") +
                ggplot2::ylab(paste0(transformation, "(pTAI)")) +
                
                ggplot2::theme_classic() +
                ggplot2::theme(legend.position="none") +
                ggplot2::coord_flip()
  
  return(plot_re)
}

Before removing 0s

ordered_stages = c("immGA", "matGA", "oldGA", "earlyPSP", "immPSP", "matPSP")

multicellular_pMat_plot(Ec_PES_32m.sqrt, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 2817 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.sqrt, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 2285 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_32m, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "raw(TPM)")
## Warning: Removed 2817 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "raw(TPM)")
## Warning: Removed 2285 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_32m.rank, stage_select = "immGA", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rank(TPM)")

multicellular_pMat_plot(Ec_PES_25f.rank, stage_select = "immGA", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rank(TPM)")

multicellular_pMat_plot(Ec_PES_32m.log2, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 2817 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.log2, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 2285 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_32m.rlog, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 5084 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.rlog, stage_select = "immGA", ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 4756 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 6 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

After removing 0s

ordered_stages = c("immGA", "matGA", "oldGA", "earlyPSP", "immPSP", "matPSP")

multicellular_pMat_plot(Ec_PES_32m.sqrt, stage_select = "immGA", ordered_stages = ordered_stages, dropzero = TRUE) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 2817 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.sqrt, stage_select = "immGA", ordered_stages = ordered_stages, dropzero = TRUE) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 2285 rows containing non-finite values (`stat_ydensity()`).

What about with gametes??

Before removing 0s

ordered_stages = c("gamete", "immGA", "matGA", "oldGA", "earlyPSP", "immPSP", "matPSP")

multicellular_pMat_plot(Ec_PES_32m.sqrt, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 3718 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.sqrt, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 3542 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_32m, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "raw(TPM)")
## Warning: Removed 3718 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "raw(TPM)")
## Warning: Removed 3542 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_32m.rank, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rank(TPM)")

multicellular_pMat_plot(Ec_PES_25f.rank, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rank(TPM)")

multicellular_pMat_plot(Ec_PES_32m.log2, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 3718 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.log2, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "log2(TPM+1)")
## Warning: Removed 3542 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_32m.rlog, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 7015 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 20 rows containing missing values (`geom_point()`).

multicellular_pMat_plot(Ec_PES_25f.rlog, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "rlog(TPM)")
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced

## Warning in FUN(X[[i]], ...): NaNs produced
## Warning: Removed 7023 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

After removing 0s

ordered_stages = c("gamete", "immGA", "matGA", "oldGA", "earlyPSP", "immPSP", "matPSP")

multicellular_pMat_plot(Ec_PES_32m.sqrt, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages, dropzero = TRUE) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec32m",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 3718 rows containing non-finite values (`stat_ydensity()`).

multicellular_pMat_plot(Ec_PES_25f.sqrt, stage_select = "gamete", multicell_names = ordered_stages, ordered_stages = ordered_stages, dropzero = TRUE) + 
        ggplot2::scale_fill_manual(values = c("#3C5488FF", "white", "white", "white", "white", "white", "white", "white", "white")) +
        ggplot2::labs(title = "Ec25f",
                     subtitle = "sqrt(TPM)")
## Warning: Removed 3542 rows containing non-finite values (`stat_ydensity()`).

How many top genes are shared?

intersection_genes <- function(PES, n_genes = 100, ordered_stages, raw_out = FALSE){

        if(!is.data.frame(PES))
                stop("PES is not a dataframe")
        
        PES_filt <- unicellular_pMat(PES = PES, unicell_names = ordered_stages, ordered_stages = ordered_stages)
        
        stage_filter <- NULL
        top_pMat <- tibble::tibble()
        for (i in 1:length(ordered_stages)) {
                stage_filter <- ordered_stages[[i]]
                
                filtered_pMatrix <- PES_filt %>%
                        dplyr::filter(Stage == stage_filter) %>% 
                        dplyr::slice_max(pTAI, n = n_genes)
                top_pMat <- rbind(top_pMat, filtered_pMatrix)
        }
        
        if(raw_out){
                return(top_pMat)
        }
        
        top_pMat_out <- top_pMat %>% 
                dplyr::count(GeneID) %>% 
                dplyr::filter(n == length(ordered_stages))
        
        # top_pMat_out <- top_pMat_out %>% nrow()
        
        return(top_pMat_out)
}
ordered_stages <- c("gamete", "meiospore", "mitospore")

intersection_genes(Ec_PES_32m, ordered_stages = ordered_stages) %>% nrow()
## [1] 22
intersection_genes(Ec_PES_25f, ordered_stages = ordered_stages) %>% nrow()
## [1] 18
intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 23
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 19
intersection_genes(Ec_PES_32m.log2, ordered_stages = ordered_stages) %>% nrow()
## [1] 23
intersection_genes(Ec_PES_25f.log2, ordered_stages = ordered_stages) %>% nrow()
## [1] 21
intersection_genes(Ec_PES_32m.rank, ordered_stages = ordered_stages) %>% nrow()
## [1] 26
intersection_genes(Ec_PES_25f.rank, ordered_stages = ordered_stages) %>% nrow()
## [1] 22
intersection_genes(Ec_PES_32m.rlog, ordered_stages = ordered_stages) %>% nrow()
## [1] 35
intersection_genes(Ec_PES_25f.rlog, ordered_stages = ordered_stages) %>% nrow()
## [1] 29
ordered_stages <- c("immGA", "matGA", "oldGA")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 40
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 53
ordered_stages <- c("earlyPSP", "immPSP", "matPSP")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 50
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 40
ordered_stages <- c("gamete", "immGA")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 20
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 29
ordered_stages <- c("gamete", "immPSP")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 22
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 24
ordered_stages <- c("immPSP", "immGA")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 73
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 69
ordered_stages <- c("meiospore", "immPSP")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 27
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 22
ordered_stages <- c("gamete", "meiospore")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 25
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 23
ordered_stages <- c("mitospore", "immPSP")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 26
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 28
ordered_stages <- c("gamete", "mitospore")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 47
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 43
ordered_stages <- c("meiospore", "mitospore")

intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 42
intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% nrow()
## [1] 37
# ordered_stages <- c("gamete", "meiospore", "mitospore")
# 
# intersection_genes(Ec_PES_32m, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m_unicell.csv")
# intersection_genes(Ec_PES_25f, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f_unicell.csv")
# intersection_genes(Ec_PES_32m.sqrt, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m.sqrt_unicell.csv")
# intersection_genes(Ec_PES_25f.sqrt, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f.sqrt_unicell.csv")
# intersection_genes(Ec_PES_32m.log2, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m.log2_unicell.csv")
# intersection_genes(Ec_PES_25f.log2, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f.log2_unicell.csv")
# intersection_genes(Ec_PES_32m.rank, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m.rank_unicell.csv")
# intersection_genes(Ec_PES_25f.rank, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f.rank_unicell.csv")
# intersection_genes(Ec_PES_32m.rlog, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m.rlog_unicell.csv")
# intersection_genes(Ec_PES_25f.rlog, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f.rlog_unicell.csv")

Further analysis using denoised data.

Load PES (denoised)

Non-transformed

Ec_PES_32m_denoised <-
  readr::read_csv(file = "data/Ec_PES_32m_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f_denoised <-
  readr::read_csv(file = "data/Ec_PES_25f_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

sqrt-tranformed

Ec_PES_32m.sqrt_denoised <-
  readr::read_csv(file = "data/Ec_PES_32m.sqrt_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt_denoised <-
  readr::read_csv(file = "data/Ec_PES_25f.sqrt_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

log2-tranformed

Ec_PES_32m.log2_denoised <-
  readr::read_csv(file = "data/Ec_PES_32m.log2_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.log2_denoised <-
  readr::read_csv(file = "data/Ec_PES_25f.log2_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): PS, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, immPSP, matP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ordered_stages <- c("gamete", "meiospore", "mitospore")

unicellular_pMat_plot(Ec_PES_32m_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m denoised",
                     subtitle = "raw(TPM)")

unicellular_pMat_plot(Ec_PES_25f_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f denoised",
                     subtitle = "raw(TPM)")

unicellular_pMat_plot(Ec_PES_32m.sqrt_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m denoised",
                     subtitle = "sqrt(TPM)")

unicellular_pMat_plot(Ec_PES_25f.sqrt_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f denoised",
                     subtitle = "sqrt(TPM)")

unicellular_pMat_plot(Ec_PES_32m.log2_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m denoised",
                     subtitle = "log2(TPM+1)")

unicellular_pMat_plot(Ec_PES_25f.log2_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f denoised",
                     subtitle = "log2(TPM+1)")

unicellular_pMat_plot_all(Ec_PES_32m, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m",
                     subtitle = "raw(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Warning: Removed 14980 rows containing non-finite values (`stat_ydensity()`).

unicellular_pMat_plot_all(Ec_PES_32m_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m denoised",
                     subtitle = "raw(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_25f_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f denoised",
                     subtitle = "raw(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_32m.sqrt_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m denoised",
                     subtitle = "sqrt(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_25f.sqrt_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f denoised",
                     subtitle = "sqrt(TPM)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_32m.log2_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec32m denoised",
                     subtitle = "log2(TPM+1)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

unicellular_pMat_plot_all(Ec_PES_25f.log2_denoised, ordered_stages = ordered_stages) + 
        ggplot2::labs(title = "Ec25f denoised",
                     subtitle = "log2(TPM+1)")
## Joining with `by = join_by(GeneID, Stage, pTAI)`
## Joining with `by = join_by(GeneID, Stage, pTAI)`

Intersection genes

ordered_stages <- c("gamete", "meiospore", "mitospore")

intersection_genes(Ec_PES_32m_denoised, ordered_stages = ordered_stages) %>% nrow()
## [1] 23
intersection_genes(Ec_PES_25f_denoised, ordered_stages = ordered_stages) %>% nrow()
## [1] 14
intersection_genes(Ec_PES_32m.sqrt_denoised, ordered_stages = ordered_stages) %>% nrow()
## [1] 18
intersection_genes(Ec_PES_25f.sqrt_denoised, ordered_stages = ordered_stages) %>% nrow()
## [1] 16
intersection_genes(Ec_PES_32m.log2_denoised, ordered_stages = ordered_stages) %>% nrow()
## [1] 35
intersection_genes(Ec_PES_25f.log2_denoised, ordered_stages = ordered_stages) %>% nrow()
## [1] 30
# intersection_genes(Ec_PES_32m_denoised, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m_denoised_unicell.csv")
# intersection_genes(Ec_PES_25f_denoised, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f_denoised_unicell.csv")
# intersection_genes(Ec_PES_32m.sqrt_denoised, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m.sqrt_denoised_unicell.csv")
# intersection_genes(Ec_PES_25f.sqrt_denoised, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f.sqrt_denoised_unicell.csv")
# intersection_genes(Ec_PES_32m.log2_denoised, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_32m.log2_denoised_unicell.csv")
# intersection_genes(Ec_PES_25f.log2_denoised, ordered_stages = ordered_stages) %>% select(1) %>% write_csv("data/pTAI/Ec_PES_25f.log2_denoised_unicell.csv")

We are left with a similar amount of genes.

Get session info.

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.2 (2022-10-31)
##  os       macOS Big Sur ... 10.16
##  system   x86_64, darwin17.0
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Europe/Berlin
##  date     2024-01-16
##  pandoc   3.1.6.2 @ /usr/local/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version    date (UTC) lib source
##  bit           4.0.5      2022-11-15 [1] CRAN (R 4.2.0)
##  bit64         4.0.5      2020-08-30 [1] CRAN (R 4.2.0)
##  bslib         0.5.1      2023-08-11 [1] CRAN (R 4.2.0)
##  cachem        1.0.8      2023-05-01 [1] CRAN (R 4.2.0)
##  callr         3.7.3      2022-11-02 [1] CRAN (R 4.2.0)
##  cli           3.6.1      2023-03-23 [1] CRAN (R 4.2.0)
##  codetools     0.2-19     2023-02-01 [1] CRAN (R 4.2.0)
##  colorspace    2.1-0      2023-01-23 [1] CRAN (R 4.2.0)
##  crayon        1.5.2      2022-09-29 [1] CRAN (R 4.2.0)
##  devtools      2.4.5      2022-10-11 [1] CRAN (R 4.2.0)
##  digest        0.6.33     2023-07-07 [1] CRAN (R 4.2.0)
##  dplyr       * 1.1.3      2023-09-03 [1] CRAN (R 4.2.0)
##  ellipsis      0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
##  evaluate      0.22       2023-09-29 [1] CRAN (R 4.2.2)
##  fansi         1.0.5      2023-10-08 [1] CRAN (R 4.2.2)
##  farver        2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
##  fastmap       1.1.1      2023-02-24 [1] CRAN (R 4.2.0)
##  forcats     * 1.0.0      2023-01-29 [1] CRAN (R 4.2.0)
##  foreach       1.5.2      2022-02-02 [1] CRAN (R 4.2.0)
##  fs            1.6.3      2023-07-20 [1] CRAN (R 4.2.0)
##  generics      0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
##  ggplot2     * 3.4.4      2023-10-12 [1] CRAN (R 4.2.2)
##  glue          1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
##  gtable        0.3.4      2023-08-21 [1] CRAN (R 4.2.0)
##  hms           1.1.3      2023-03-21 [1] CRAN (R 4.2.0)
##  htmltools     0.5.6.1    2023-10-06 [1] CRAN (R 4.2.2)
##  htmlwidgets   1.6.2      2023-03-17 [1] CRAN (R 4.2.0)
##  httpuv        1.6.11     2023-05-11 [1] CRAN (R 4.2.2)
##  insight       0.19.6     2023-10-12 [1] CRAN (R 4.2.2)
##  iterators     1.0.14     2022-02-05 [1] CRAN (R 4.2.0)
##  jquerylib     0.1.4      2021-04-26 [1] CRAN (R 4.2.0)
##  jsonlite      1.8.7      2023-06-29 [1] CRAN (R 4.2.0)
##  knitr         1.44       2023-09-11 [1] CRAN (R 4.2.0)
##  labeling      0.4.3      2023-08-29 [1] CRAN (R 4.2.0)
##  later         1.3.1      2023-05-02 [1] CRAN (R 4.2.2)
##  lattice       0.21-9     2023-10-01 [1] CRAN (R 4.2.2)
##  lifecycle     1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
##  lubridate   * 1.9.3      2023-09-27 [1] CRAN (R 4.2.0)
##  magrittr      2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
##  Matrix        1.5-4.1    2023-05-18 [1] CRAN (R 4.2.0)
##  memoise       2.0.1      2021-11-26 [1] CRAN (R 4.2.0)
##  mime          0.12       2021-09-28 [1] CRAN (R 4.2.0)
##  miniUI        0.1.1.1    2018-05-18 [1] CRAN (R 4.2.0)
##  munsell       0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
##  myTAI       * 1.0.1.9000 2023-12-07 [1] Github (drostlab/myTAI@27a2639)
##  pillar        1.9.0      2023-03-22 [1] CRAN (R 4.2.0)
##  pkgbuild      1.4.2      2023-06-26 [1] CRAN (R 4.2.0)
##  pkgconfig     2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
##  pkgload       1.3.3      2023-09-22 [1] CRAN (R 4.2.0)
##  prettyunits   1.2.0      2023-09-24 [1] CRAN (R 4.2.0)
##  processx      3.8.2      2023-06-30 [1] CRAN (R 4.2.0)
##  profvis       0.3.8      2023-05-02 [1] CRAN (R 4.2.0)
##  promises      1.2.1      2023-08-10 [1] CRAN (R 4.2.2)
##  ps            1.7.5      2023-04-18 [1] CRAN (R 4.2.0)
##  purrr       * 1.0.2      2023-08-10 [1] CRAN (R 4.2.2)
##  R6            2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
##  Rcpp          1.0.11     2023-07-06 [1] CRAN (R 4.2.0)
##  readr       * 2.1.4      2023-02-10 [1] CRAN (R 4.2.0)
##  remotes       2.4.2.1    2023-07-18 [1] CRAN (R 4.2.2)
##  rlang         1.1.1      2023-04-28 [1] CRAN (R 4.2.0)
##  rmarkdown     2.25       2023-09-18 [1] CRAN (R 4.2.2)
##  rstudioapi    0.15.0     2023-07-07 [1] CRAN (R 4.2.0)
##  sass          0.4.7      2023-07-15 [1] CRAN (R 4.2.0)
##  scales        1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
##  see         * 0.8.0      2023-06-05 [1] CRAN (R 4.2.0)
##  sessioninfo   1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
##  shiny         1.7.5.1    2023-10-14 [1] CRAN (R 4.2.2)
##  stringi       1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
##  stringr     * 1.5.0      2022-12-02 [1] CRAN (R 4.2.0)
##  tibble      * 3.2.1      2023-03-20 [1] CRAN (R 4.2.0)
##  tidyr       * 1.3.0      2023-01-24 [1] CRAN (R 4.2.0)
##  tidyselect    1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
##  tidyverse   * 2.0.0      2023-02-22 [1] CRAN (R 4.2.0)
##  timechange    0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
##  tzdb          0.4.0      2023-05-12 [1] CRAN (R 4.2.2)
##  urlchecker    1.0.1      2021-11-30 [1] CRAN (R 4.2.0)
##  usethis       2.2.2      2023-07-06 [1] CRAN (R 4.2.0)
##  utf8          1.2.3      2023-01-31 [1] CRAN (R 4.2.0)
##  vctrs         0.6.4      2023-10-12 [1] CRAN (R 4.2.2)
##  vroom         1.6.4      2023-10-02 [1] CRAN (R 4.2.2)
##  withr         2.5.1      2023-09-26 [1] CRAN (R 4.2.0)
##  xfun          0.40       2023-08-09 [1] CRAN (R 4.2.2)
##  xtable        1.8-4      2019-04-21 [1] CRAN (R 4.2.0)
##  yaml          2.3.7      2023-01-23 [1] CRAN (R 4.2.0)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────